RSSAgent: Include `url` in addition to `urls` in each event.

An Atom/RSS entry usually has just one URL, and even if it has many only
the first of those is likely to be of interest.

Emitting an event with `url` also helps downstream agents like
WebsiteAgent that uses the key name to get a URL to crawl.

Akinori MUSHA 10 years ago
parent
commit
96b183bf09
2 changed files with 16 additions and 10 deletions
  1. 12 10
      app/models/agents/rss_agent.rb
  2. 4 0
      spec/models/agents/rss_agent_spec.rb

+ 12 - 10
app/models/agents/rss_agent.rb

@@ -41,6 +41,7 @@ module Agents
41 41
             "id": "829f845279611d7925146725317b868d",
42 42
             "date_published": "2014-09-11 01:30:00 -0700",
43 43
             "last_updated": "Thu, 11 Sep 2014 01:30:00 -0700",
44
+            "url": "http://example.com/...",
44 45
             "urls": [ "http://example.com/..." ],
45 46
             "description": "Some description",
46 47
             "content": "Some content",
@@ -75,16 +76,17 @@ module Agents
75 76
           entry_id = get_entry_id(entry)
76 77
           if check_and_track(entry_id)
77 78
             created_event_count += 1
78
-            create_event(:payload => {
79
-              :id => entry_id,
80
-              :date_published => entry.date_published,
81
-              :last_updated => entry.last_updated,
82
-              :urls => entry.urls,
83
-              :description => entry.description,
84
-              :content => entry.content,
85
-              :title => entry.title,
86
-              :authors => entry.authors,
87
-              :categories => entry.categories
79
+            create_event(payload: {
80
+              id: entry_id,
81
+              date_published: entry.date_published,
82
+              last_updated: entry.last_updated,
83
+              url: entry.url,
84
+              urls: entry.urls,
85
+              description: entry.description,
86
+              content: entry.content,
87
+              title: entry.title,
88
+              authors: entry.authors,
89
+              categories: entry.categories
88 90
             })
89 91
           end
90 92
         end

+ 4 - 0
spec/models/agents/rss_agent_spec.rb

@@ -55,6 +55,10 @@ describe Agents::RssAgent do
55 55
       expect {
56 56
         agent.check
57 57
       }.to change { agent.events.count }.by(20)
58
+
59
+      event = agent.events.last
60
+      expect(event.payload['url']).to eq("https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0")
61
+      expect(event.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"])
58 62
     end
59 63
 
60 64
     it "should track ids and not re-emit the same item when seen again" do